from pymongo import MongoClient
from sklearn.model_selection import train_test_split
import numpy as np
client = MongoClient(port=27017)
db=client['tiktok_p2']
version_num='t1'
collist=['gaminglife']
imglist=[]
#txtlist=[]
audiolist=[]
for col in collist:
    for obj in db[col].find():
        img=[]
        #txt=[]
        audio=[]
        for i in range(10):
            img.append(obj['frame_level']['f'+str(i)]['img'])
            #txt.append(obj['frame_level']['f' + str(i)]['text'])
            audio.append(obj['frame_level']['f' + str(i)]['audio'])
        imglist.append(img)
        #txtlist.append(txt)
        audiolist.append(audio)

train_img, test_img=train_test_split(imglist,test_size=0.2,random_state=42)
#train_txt,test_txt=train_test_split(txtlist,test_size=0.2,random_state=42)
train_audio,test_audio=train_test_split(audiolist,test_size=0.2,random_state=42)
np.save('E:\\data_p2\\image_p2_train_'+version_num, np.array(train_img))
#np.save('E:\\data_p2\\text_p2_train_'+version_num, np.array(train_txt))
np.save('E:\\data_p2\\audio_p2_train_'+version_num, np.array(train_audio))
np.save('E:\\data_p2\\image_p2_test_'+version_num, np.array(test_img))
#np.save('E:\\data_p2\\text_p2_test_'+version_num, np.array(test_txt))
np.save('E:\\data_p2\\audio_p2_test_'+version_num, np.array(test_audio))
